## Data Cleaning
### Cleaning and some basic explore
dat <- read.csv("./CleanedMediumData.csv",header=T)
head(dat)
dim(dat)

datX <- dat[,1:55]
datY <- dat[,55:57]


datX0 <- scale(datX, center=T, scale=T)
head(datX0)

plot(datY[,1])
plot(datY[,2])

hist(datY[,1])
hist(datY[,2])

plot(density(datY[,1]))
plot(density(datY[,2]))

summary(datY[,1])
summary(datY[,2])

for (i in 1:53)
{
    plot(datX0[,i],datY[,1], main=paste("X=",i,",Y=1"))
    readline()
    plot(datX0[,i],datY[,2], main=paste("X=",i,",Y=2"))
    readline()
}

for ( i in 1:53)
{
    plot(density(datX0[,i]),main=paste("X=",i))
    readline()
}

#### Auto-correlation?
for (i in 1:53)
{
    acf(datX0[,i])
    readline()
}

acf(datY[,1])
acf(datY[,2])
    

#### Put data together
lmdat <- cbind(datX,datY[,c(1,2)])
colnames(lmdat)[56:57] = c("Y1","Y2")
colnames(lmdat)

## Variable Selection
### Proposed Methods:
###  1. Bootstrapping columns (Select Different Variables)
###  2. LASSO (regularized regression)

library(caret)
